******************************************************************************
clear all
set maxvar 30000
version 14
capture log close
set more off

****************************************************************************************************
* -----   Customize the paths and options:   ----- 
****************************************************************************************************
*cd  "Data\"

*
global MY_IN_PATH   "/Users/ben/Dropbox/RnD_tax_credit/ReStat_repl_package/Data"
global MY_OUT_PATH  "/Users/ben/Dropbox/RnD_tax_credit/ReStat_repl_package/Data"

*global MY_TEMP_PATH "..."

global MY_OUT_FILE  ${MY_OUT_PATH}out.dta
global MY_LOG_FILE  ${MY_OUT_PATH}cr_out.log


*global MY_TEMP_PATH "..."

*global MY_OUT_FILE  ${MY_OUT_PATH}out.dta
*global MY_LOG_FILE  ${MY_OUT_PATH}cr_out.log


*log using "${MY_LOG_FILE}", text replace
****************************************************************************************************
* import inventor data
*************************************************************************************************
import delimited using ${MY_IN_PATH}/inventor.geo.assignee.combo.disambig.tsv, clear

drop firstname
drop name
drop city
drop id
drop inventor_id
bysort inventor_idx : gen N =_N
ren inventor_idx inventor_id
drop if N < 2
drop N
ren patno patent
compress

* Remove design pats etc
gen h = substr(patent, 1, 1)
tab h
drop if h =="D" 
drop if h =="H" 
drop if h =="P" 
drop if h =="R"
drop if h =="T"
destring patent, replace
drop h


* drop duplicate obs
bysort patent inventor_id: gen n=_n
keep if n==1
drop n


* merge all firm old dummy new data from Maria
merge n:1 patent using ${MY_IN_PATH}/patents_level_2.dta,
keep if _merge ==3
drop _merge

drop lastname

keep if fips_state != "NULL"
drop country
compress

gen state_fips = substr(fips_state,1,2)
replace state_fips = "1" if state_fips =="1."
replace state_fips = "1" if state_fips =="01"
replace state_fips = "2" if state_fips =="02"
replace state_fips = "4" if state_fips =="04"
replace state_fips = "5" if state_fips =="05"
replace state_fips = "6" if state_fips =="06"
replace state_fips = "8" if state_fips =="08"
replace state_fips = "9" if state_fips =="09"

destring state_fips, replace force
drop if state_fips > 56
drop fips

ren ayear year

* delete duplicate inventors on same patent in same state
duplicates drop patent state_fips, force


* US only patents
gen h = 1 if state_fips ==.
gegen hh = sum(h), by(patent)
drop if hh>0
drop hh

compress
save ${MY_OUT_PATH}/inv_loc_.dta
